import pytesseract
from PIL import Image
import re

# （1）配置tesseract安装路径
pytesseract.pytesseract.tesseract_cmd = r'D:\Program Files\Tesseract-OCR\tesseract.exe'

text = pytesseract.image_to_string(Image.open(r'D:\桌面\一级文件夹\二级文件夹\05073978.png'), 'chi_sim')
invoice_code = re.search(r'发票代码[:：]?\s*(\d{10})', text)
invoice_number = re.search(r'发票号码[:：]?\s*(\d+)', text)
invoice_date = re.search(r'开票日期[:：]?\s*(\d{4}年\d{1,2}月\d{1,2}日)', text)
invoice_amount = re.search(r'合计[:：]?\s*¥?(\d+\.\d+)', text)
invoice_type = '增值税专用发票'  # 发票类型固定

print(text)
# print(invoice_code)
# print(invoice_number)
# print(invoice_date)
# print(invoice_amount)
# print(invoice_type)